18  G-g Difference (KS)

Purpose: Explore effects of monthly/annual water availability on Big-little difference using non-parametric Kolmogorov-Smirnov tests

18.1 Site info and daily data

Code
# site information and locations
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)
mapview(siteinfo_sp, zcol = "designation")
Code
# flow/yield (and temp) data 
dat <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_DailyWeekly.csv") %>%
  filter(!site_name %in% c("WoundedBuckCreek", "Brackett Creek"))

# add water/climate year variables
dat <- add_date_variables(dat, dates = date, water_year_start = 4)

18.2 Separate and join G-g

Code
# pull out big G data
dat_big <- dat %>% filter(site_name %in% c("West Brook NWIS", "Paine Run 10", "Piney River 10", "Staunton River 10", "Spread Creek Dam", "BigCreekLower", "Shields River ab Smith NWIS", "Donner Blitzen River nr Frenchglen NWIS")) 

# pull out little G data, assign big G site name
dat_little <- dat %>% 
  filter(designation %in% c("little", "medium"), !subbasin %in% c("Coal Creek", "McGee Creek", "Duck Creek", "Flathead"),
         !site_name %in% c("West Brook NWIS", "West Brook 0", "Paine Run 10", "Piney River 10", "Staunton River 10", "Spread Creek Dam", "Shields River ab Smith NWIS", "BigCreekLower")) %>%
  mutate(site_name_big = ifelse(subbasin == "Big Creek", "BigCreekLower",
                                ifelse(subbasin == "West Brook", "West Brook NWIS", 
                                       ifelse(subbasin == "Paine Run", "Paine Run 10", 
                                              ifelse(subbasin == "Piney River", "Piney River 10",
                                                     ifelse(subbasin == "Staunton River", "Staunton River 10",
                                                            ifelse(subbasin == "Shields River", "Shields River ab Smith NWIS",
                                                                   ifelse(subbasin == "Snake River", "Spread Creek Dam", "Donner Blitzen River nr Frenchglen NWIS"))))))))

# Join big-little discharge data
dat_Gg <- dat_little %>%
  select(site_name, site_name_big, basin, subbasin, date, Month, MonthName, WaterYear, Yield_filled_mm_7) %>% rename(yield_little = Yield_filled_mm_7) %>%
  left_join(dat_big %>% select(site_name, date, Yield_filled_mm_7) %>% rename(site_name_big = site_name, yield_big = Yield_filled_mm_7)) %>%
  drop_na() %>% mutate(MonthName = as.character(MonthName))

# table
dat_Gg %>% group_by(subbasin) %>% summarize(site_name_big = unique(site_name_big)) %>% kable(caption = "Big G gage names for each focal sub-basin.")
Big G gage names for each focal sub-basin.
subbasin site_name_big
Big Creek BigCreekLower
Donner Blitzen Donner Blitzen River nr Frenchglen NWIS
Paine Run Paine Run 10
Shields River Shields River ab Smith NWIS
Snake River Spread Creek Dam
Staunton River Staunton River 10
West Brook West Brook NWIS

View unique little g site names

Code
sort(unique(dat_Gg$site_name))
 [1] "Avery Brook"                      "Avery Brook NWIS"                
 [3] "Big Creek NWIS"                   "BigCreekMiddle"                  
 [5] "BigCreekUpper"                    "Buck Creek"                      
 [7] "Crandall Creek"                   "Deep Creek"                      
 [9] "Donner Blitzen ab Fish NWIS"      "Donner Blitzen ab Indian NWIS"   
[11] "Donner Blitzen nr Burnt Car NWIS" "Dugout Creek"                    
[13] "Dugout Creek NWIS"                "Fish Creek NWIS"                 
[15] "Grizzly Creek"                    "Grouse Creek"                    
[17] "Hallowat Creek NWIS"              "HallowattCreekLower"             
[19] "Jimmy Brook"                      "LangfordCreekLower"              
[21] "LangfordCreekUpper"               "Leidy Creek Mouth"               
[23] "Leidy Creek Mouth NWIS"           "Leidy Creek Upper"               
[25] "Lodgepole Creek"                  "Mitchell Brook"                  
[27] "NF Spread Creek Lower"            "NF Spread Creek Upper"           
[29] "NicolaCreek"                      "Obear Brook Lower"               
[31] "Paine Run 01"                     "Paine Run 02"                    
[33] "Paine Run 06"                     "Paine Run 07"                    
[35] "Paine Run 08"                     "Rock Creek"                      
[37] "Sanderson Brook"                  "SF Spread Creek Lower"           
[39] "SF Spread Creek Lower NWIS"       "SF Spread Creek Upper"           
[41] "Shields River ab Dugout"          "SkookoleelCreek"                 
[43] "Staunton River 02"                "Staunton River 03"               
[45] "Staunton River 06"                "Staunton River 07"               
[47] "Staunton River 09"                "WernerCreek"                     
[49] "West Brook Lower"                 "West Brook Reservoir"            
[51] "West Brook Upper"                 "West Whately Brook"              

18.3 Compute Gg Difference

Code
mysites <- unique(dat_Gg$site_name)
kscompare_list <- list()
for (i in 1:length(mysites)) {
  d <- dat_Gg %>% filter(site_name == mysites[i])
  yrs <- unique(d$WaterYear)
  kscompare_list_yr <- list()
  for(j in 1:length(yrs)) {
    dy <- d %>% filter(WaterYear == yrs[j])
    mypvals_monthly <- tibble(site_name = rep(NA, times = 12), 
                              site_name_big = rep(NA, times = 12), 
                              WaterYear = rep(NA, times = 12),
                              Month = rep(NA, times = 12),
                              MonthName = rep(NA, times = 12),
                              days = rep(NA, times = 12),
                              stat = rep(NA, times = 12),
                              pval = rep(NA, times = 12))
    for(k in 1:12) {
      dym <- dy %>% filter(Month == k)
      if(dim(dym)[1] < 25) next
      mytest <- ks.test(dym$yield_little, dym$yield_big, exact = TRUE)
      mypvals_monthly$site_name[k] <- mysites[i]
      mypvals_monthly$site_name_big[k] <- unique(dym$site_name_big)
      mypvals_monthly$WaterYear[k] <- yrs[j]
      mypvals_monthly$Month[k] <- k
      mypvals_monthly$MonthName[k] <- unique(dym$MonthName)
      mypvals_monthly$days[k] <- dim(dym)[1]
      mypvals_monthly$stat[k] <- mytest$statistic
      mypvals_monthly$pval[k] <- mytest$p.value
      }
    kscompare_list_yr[[j]] <- mypvals_monthly
    }
  kscompare_list[[i]] <- do.call(rbind, kscompare_list_yr)
}
kscompare <- do.call(rbind, kscompare_list) %>% drop_na()

View relationship between KS test statistic and p-value

Code
plot(pval ~ stat, kscompare, xlab = "KS test statistic", ylab = "p-value")

View distribution of KS test statistics and p-values

Code
hist(kscompare$stat)

Code
hist(kscompare$pval)

18.4 Calculate total yield

Code
# Get total annual and monthly Big G yield and join to KS diffs

# get total annual yield by big G site
totyield_annual <- dat_big %>% 
  group_by(site_name, WaterYear, basin, subbasin) %>% 
  summarize(days = n(), yield_annual = sum(Yield_filled_mm, na.rm = TRUE)) %>%
  filter(!is.na(yield_annual), days >= 360) %>%
  ungroup() %>%
  rename(site_name_big = site_name) %>%
  select(site_name_big, WaterYear, yield_annual) %>%
  ungroup()

# get total monthly yield by big G site
totyield_monthly <- dat_big %>% 
  group_by(site_name, WaterYear, basin, subbasin, Month) %>% 
  summarize(days = n(), yield_monthly = sum(Yield_filled_mm, na.rm = TRUE)) %>%
  filter(!is.na(yield_monthly), days >= 28) %>%
  ungroup() %>%
  rename(site_name_big = site_name) %>%
  select(site_name_big, WaterYear, Month, yield_monthly) %>%
  ungroup()

18.5 Explore Gg diff by water availability

Hypothesis: during wetter periods (months/years) flow regimes become more similar among locations within stream networks (i.e., positive relationship between monthly/annual total yield and KS-test p-value)

How might this vary among basins that differ in primary water source (rain vs. snow) and among sites within basins (due to surface vs. subsurface controls on flow)?

Code
# join KS comparison df with annual/monthly water availability
kscompare <- kscompare %>% left_join(totyield_annual) %>% left_join(totyield_monthly) %>%
  mutate(MonthName = factor(MonthName, levels = c("Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar"))) %>%
  left_join(siteinfo %>% select(site_name, basin))

18.5.1 Global relationship

Plot relationship between log(monthly total yield) and log(KS p-value) for all basins, sites, and months combined

Code
kscompare %>% ggplot(aes(x = log(yield_monthly), y = log(pval))) + 
  geom_point(aes(group = MonthName, color = MonthName)) + 
  geom_smooth(method = "lm") + geom_abline(slope = 0, intercept = log(0.05), linetype = "dashed")

18.5.2 Basin-level effects

Code
# Plot all sites, facet by basin
kscompare %>% ggplot(aes(x = log(yield_monthly), y = log(pval))) + 
  geom_point(aes(group = MonthName, color = MonthName)) + facet_wrap(~ basin) + 
  geom_smooth(method = "lm") + geom_abline(slope = 0, intercept = log(0.05), linetype = "dashed")

18.5.3 Basin and site effects

Code
mybasins <- unique(kscompare$basin)
myplots <- list()
for (i in 1:length(mybasins)) {
  myplots[[i]] <- kscompare %>% 
    filter(basin == mybasins[i]) %>%
    ggplot(aes(x = log(yield_monthly), y = log(pval))) + 
    geom_point(aes(group = MonthName, color = MonthName)) + 
    facet_wrap(~ site_name) + 
    geom_smooth(method = "lm") + 
    geom_abline(slope = 0, intercept = log(0.05), linetype = "dashed")
}